data <- read_csv(file = "data/raw_data/raw_data.csv",
                 na = c("", "NA", "prefer not to say", "refuse to answer", "yes"),
                 show_col_types = FALSE)

data <- rename(.data = data,
               age = Q3,
               sex = Q4,
               education = Q5,
               hispanic = Q7,
               race = Q8,
               income = Q12,
               hhsize = Q13,
               party = Q14,
               ideology = Q15,
               voted_2016 = Q16,
               vote_choice = Q17,
               race_view_police = Q21,
               race_view_poverty = Q26,
               support_police_policy = Q23,
               support_poverty_policy = Q28)

data <- mutate(.data = data,
               age = as.numeric(age),
               female = ifelse(test = sex == 2, yes = 1, no = 0),
               college_plus = ifelse(test = education >= 5, yes = 1, no = 0),
               hispanic = ifelse(test = hispanic == 1, yes = 1, no = 0),
               white = ifelse(test = race == 1, yes = 1, no = 0),
               black = ifelse(test = race == 2, yes = 1, no = 0),
               hhsize = as.numeric(hhsize),
               republican = ifelse(test = party == 1, yes = 1, no = 0),
               democrat = ifelse(test = party == 3, yes = 1, no = 0),
               independent = ifelse(test = party == 2, yes = 1, no = 0),
               conservative = ifelse(test = ideology %in% 1:2, yes = 1, no = 0),
               moderate = ifelse(test = ideology == 3, yes = 1, no = 0),
               liberal = ifelse(test = ideology %in% 4:5, yes = 1, no = 0),
               voted_2016 = ifelse(test = voted_2016 == 1, yes = 1, no = 0),
               voted_clinton = ifelse(test = vote_choice == 1, yes = 1, no = 0),
               voted_trump = ifelse(test = vote_choice == 2, yes = 1, no = 0),
               support_police_policy = as.numeric(support_police_policy),
               support_poverty_policy = as.numeric(support_poverty_policy),
               deandre = case_when(
                 Q20 == 1 ~ 0,  # If Q20 is 1, assign deandre = 0
                 Q25 == 1 ~ 0,   # If Q25 is 1, assign deandre = 0
                 Q30 == 1 ~ 1,  # If Q30 is 1, assign deandre = 1
                 Q32 == 1 ~ 1,  # If Q32 is 1, assign deandre = 1
                 TRUE ~ NA_real_  # Otherwise (if none are 1), assign NA (as a numeric value)
               ),
               police = case_when(
                 Q20 == 1 ~ 1,  # If Q20 is 1, assign police = 1
                 Q25 == 1 ~ 0,   # If Q25 is 1, assign police = 0
                 Q30 == 1 ~ 1,  # If Q30 is 1, assign police = 1
                 Q32 == 1 ~ 0,  # If Q32 is 1, assign police = 0
                 TRUE ~ NA_real_  # Otherwise (if none are 1), assign NA (as a numeric value)
               ),
               ## Recode race_view_police and race_view_poverty to -1, 0, 1 scale
               race_view_police = ifelse(test = race_view_police == 1, yes = 0, no = race_view_police),
               race_view_police = ifelse(test = race_view_police == 2, yes = 1, no = race_view_police),
               race_view_police = ifelse(test = race_view_police == 3, yes = -1, no = race_view_police),
               
               race_view_poverty = ifelse(test = race_view_poverty == 1, yes = 0, no = race_view_poverty),
               race_view_poverty = ifelse(test = race_view_poverty == 2, yes = 1, no = race_view_poverty),
               race_view_poverty = ifelse(test = race_view_poverty == 3, yes = -1, no = race_view_poverty),
               
               # Use appropriate outcome based on treatment arm
               support_policy = ifelse(test = is.na(support_poverty_policy), yes = support_police_policy, no = support_poverty_policy),
               # Recode support policy to -1, 0, 1 scale
               support_policy = ifelse(test = support_policy == 1, yes = 1, no = support_policy),
               support_policy = ifelse(test = support_policy == 2,  yes = 0, no = support_policy),
               support_policy = ifelse(test = support_policy == 3,  yes = -1, no = support_policy),
               
               age = ifelse(test = age < 18 | age >= 90, yes = NA_integer_, no = age))

data <- select(.data = data,
               age,
               female,
               college_plus,
               hispanic,
               white,
               black,
               income,
               hhsize,
               republican,
               democrat,
               independent,
               conservative,
               moderate,
               liberal,
               voted_2016,
               voted_clinton,
               voted_trump,
               race_view_police,
               race_view_poverty,
               support_policy,
               deandre,
               police)

# Drop observations whose outcome value is missing
data <- filter(.data = data,
               !is.na(support_policy))

## Save the dataset in R binary format (.RData) for quick loading in future sessions
save(data, file = "data/data.RData")
rm(data)
gc()